# Week 4 Data Visualization Lab

# Install the package ggplot2
#install.packages("ggplot2")

# Any time I want to use this package, I need to load it
library(ggplot2)

View(cars)

# A quick baseR plot - this is not ggplot
plot(cars)

# Our first ggplot
#We need data + aes + geom
ggplot(data = cars) +
  aes(x = speed, y = dist) +
  geom_point() 

p <- ggplot(data = cars) +
  aes(x = speed, y = dist) +
  geom_point() 

# Add a line geom with geom_line()
p + geom_line()

# Add a trend line close to the data

p + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

# Adding labels
p + geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Speed and Stopping Distances of Cars",
       x = "Speed (MPH)",
       y = "Stopping Distance (ft)",
       subtitle = "Your informative subtitle text here",
       caption = "Dataset: 'cars'") + 
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

##

#Read in drug expression data

url <- "https://bioboot.github.io/bimm143_S20/class-material/up_down_expression.txt"

genes <- read.delim(url)
head(genes)
##         Gene Condition1 Condition2      State
## 1      A4GNT -3.6808610 -3.4401355 unchanging
## 2       AAAS  4.5479580  4.3864126 unchanging
## 3      AASDH  3.7190695  3.4787276 unchanging
## 4       AATF  5.0784720  5.0151916 unchanging
## 5       AATK  0.4711421  0.5598642 unchanging
## 6 AB015752.4 -3.6808610 -3.5921390 unchanging
# How many genes
nrow(genes)
## [1] 5196
# Column names and number of columns
colnames(genes)
## [1] "Gene"       "Condition1" "Condition2" "State"
ncol(genes)
## [1] 4
#How many upreglated genes
table(genes$State)
## 
##       down unchanging         up 
##         72       4997        127
#Fraction of genes up-regulated
round ( (table(genes$State) / nrow(genes)) * 100, 2)
## 
##       down unchanging         up 
##       1.39      96.17       2.44
# Let's make a first plot attempt
ggplot(data = genes) + 
  aes(x = Condition1, y = Condition2, col = State) + 
  geom_point()

#Change colors
ggplot(data = genes) +
  aes(x = Condition1, y = Condition2, col = State) +
  geom_point() +
  scale_color_manual(values = c("blue", "gray", "red")) +
  labs(title = "Gene Expression Changes Upon Drug Treatment",
       x = "Control (no drug)",
       y = "Drug Treatment") +
  theme_bw()

##

#Optional Part 6

#install.packages("gapminder")
library(gapminder)

# File location online
url2 <- "https://raw.githubusercontent.com/jennybc/gapminder/master/inst/extdata/gapminder.tsv"
gapminder <- read.delim(url2)

#install.packages(dplyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
gapminder_2007 <- gapminder %>%
  filter(year == 2007)

ggplot(gapminder_2007) +
  aes(x = gdpPercap, y = lifeExp, color = continent, size = pop) +
  geom_point(alpha = 0.5)

# Color by pop
ggplot(gapminder_2007) + 
  aes(x = gdpPercap, y = lifeExp,
      size = pop) +
  geom_point(alpha = 0.5)

#Scale to reflect actual population differences
ggplot(gapminder_2007) + 
  geom_point(aes(x = gdpPercap, y = lifeExp,
                 size = pop),
             alpha = 0.5) +
  scale_size_area(max_size = 10)

#1957 Plot
gapminder_1957 <- gapminder %>% 
  filter(year == 1957)

ggplot(gapminder_1957) +
  aes(x = gdpPercap, y = lifeExp,
      color = continent,
      size = pop) +
  geom_point(alpha = 0.7)

scale_size_area(max_size = 10)
## <ScaleContinuous>
##  Range:  
##  Limits:    0 --    1
#Combine 1957 and 2007
gapminder_combined <- gapminder %>%
  filter(year == 1957 | year == 2007)

ggplot(gapminder_combined) + 
  geom_point(aes(x = gdpPercap, y = lifeExp,
                 color=continent,
                 size = pop), alpha=0.7) +
  scale_size_area(max_size = 10) +
  facet_wrap(~year)

##

#Optional Part 7

gapminder_top5 <- gapminder %>% 
  filter(year == 2007) %>% 
  arrange(desc(pop)) %>% 
  top_n(5, pop)
gapminder_top5
##         country continent year lifeExp        pop gdpPercap
## 1         China      Asia 2007  72.961 1318683096  4959.115
## 2         India      Asia 2007  64.698 1110396331  2452.210
## 3 United States  Americas 2007  78.242  301139947 42951.653
## 4     Indonesia      Asia 2007  70.650  223547000  3540.652
## 5        Brazil  Americas 2007  72.390  190010647  9065.801
#Creating a bar chart
ggplot(gapminder_top5) +
  geom_col(aes(x = country, y = pop))

ggplot(gapminder_top5) +
  geom_col(aes(x = country, y = lifeExp))

#Filling bars with color
ggplot(gapminder_top5) + 
  geom_col(aes(x = country, y = pop, fill = continent))

ggplot(gapminder_top5) + 
  geom_col(aes(x = country, y = pop, fill = lifeExp))

#Population size by country
ggplot(gapminder_top5) +
  aes(x = country, y = pop, fill = gdpPercap) +
  geom_col()

#Change order of bars
ggplot(gapminder_top5) +
  aes(x = reorder(country, -pop), y=pop,
      fill = gdpPercap) +
  geom_col()

ggplot(gapminder_top5) +
  aes(x = reorder(country, -pop), y = pop,
      fill = country) +
  geom_col(col = "gray30") +
  guides(fill = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

#Flipping bar charts
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
USArrests$State <- rownames(USArrests)

ggplot(USArrests) +
  aes(x = reorder(State, Murder), y = Murder) +
  geom_col() +
  coord_flip()

ggplot(USArrests) +
  aes(x = reorder(State, Murder), y = Murder) +
  geom_point() +
  geom_segment(aes(x=State, 
                   xend=State, 
                   y=0, 
                   yend=Murder),
               color = "blue") +
  coord_flip()

##

#Optional Part 8

#install.packages("gifski")
#install.packages("gganimate")

library(gapminder)
library(gganimate)

# Setup nice regular ggplot of the gapminder data
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
  geom_point(alpha = 0.7, show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  scale_x_log10() +
  # Facet by continent
  facet_wrap(~continent) +
  # Here comes the gganimate specific bits
  labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
  transition_time(year) +
  shadow_wake(wake_length = 0.1, alpha = FALSE)

##

#Optional Part 9

#Combining Plots

#install.packages("patchwork")
library(patchwork)

# Setup some example plots 
p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp))
p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear))
p3 <- ggplot(mtcars) + geom_smooth(aes(disp, qsec))
p4 <- ggplot(mtcars) + geom_bar(aes(carb))

# Use patchwork to combine them here:
(p1 | p2 | p3) / 
  p4
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] patchwork_1.1.1 gganimate_1.0.7 dplyr_1.0.7     gapminder_0.3.0
## [5] ggplot2_3.3.5  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8        plyr_1.8.6        pillar_1.6.4      compiler_4.1.2   
##  [5] jquerylib_0.1.4   highr_0.9         prettyunits_1.1.1 progress_1.2.2   
##  [9] tools_4.1.2       digest_0.6.27     lattice_0.20-45   nlme_3.1-155     
## [13] evaluate_0.14     lifecycle_1.0.1   tibble_3.1.6      gtable_0.3.0     
## [17] mgcv_1.8-38       pkgconfig_2.0.3   rlang_0.4.11      Matrix_1.4-0     
## [21] DBI_1.1.2         yaml_2.2.1        xfun_0.29         withr_2.4.3      
## [25] stringr_1.4.0     knitr_1.37        hms_1.1.1         generics_0.1.1   
## [29] vctrs_0.3.8       grid_4.1.2        tidyselect_1.1.1  glue_1.6.0       
## [33] R6_2.5.1          gifski_1.4.3-1    fansi_0.5.0       rmarkdown_2.11   
## [37] tweenr_1.0.2      farver_2.1.0      purrr_0.3.4       magrittr_2.0.1   
## [41] splines_4.1.2     scales_1.1.1      ellipsis_0.3.2    htmltools_0.5.1.1
## [45] assertthat_0.2.1  colorspace_2.0-2  labeling_0.4.2    utf8_1.2.2       
## [49] stringi_1.7.6     munsell_0.5.0     crayon_1.4.2